# Load libraries
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(ggplot2)
library(dplyr)
df <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\25_Portfolios_5x5.csv", header = TRUE, skip = 15)
df <- df[1:1146,]
names(df)[names(df) == "X"] <- "Date"
df <- as.data.frame(lapply(df, as.numeric))
df$Date <- parse_date_time(df$Date, "ym")
tail(df)
## Date SMALL.LoBM ME1.BM2 ME1.BM3 ME1.BM4 SMALL.HiBM ME2.BM1 ME2.BM2
## 1141 2021-07-01 -10.1722 -7.6929 -3.6097 -4.7531 -1.5492 -7.4616 -3.2841
## 1142 2021-08-01 2.5319 5.5794 4.0683 1.8348 3.9693 1.9219 2.3791
## 1143 2021-09-01 -5.1961 -4.3855 -3.3635 -0.8425 0.1511 -5.3511 -4.1155
## 1144 2021-10-01 2.0093 -0.3933 1.0377 1.8838 4.1799 2.1078 2.5007
## 1145 2021-11-01 -8.7691 -6.8659 -4.1992 -3.8271 -3.4686 -7.9499 -3.8268
## 1146 2021-12-01 -6.6488 0.3345 1.9093 2.3951 3.3515 -0.2830 0.0193
## ME2.BM3 ME2.BM4 ME2.BM5 ME3.BM1 ME3.BM2 ME3.BM3 ME3.BM4 ME3.BM5 ME4.BM1
## 1141 -1.7334 -2.1717 -6.0351 -2.5013 -1.1614 -1.0348 -1.1665 -4.5054 -0.7491
## 1142 1.2297 1.5206 2.4331 1.9087 1.4109 1.8187 1.8091 3.5476 3.2077
## 1143 -3.6209 -0.4379 3.3170 -3.8808 -4.0114 -2.8858 -4.3101 1.9898 -4.3556
## 1144 3.8364 4.6983 3.0187 4.7347 3.5965 3.9967 3.9258 4.2605 4.7589
## 1145 0.7390 -3.0060 -4.1636 -8.5724 -2.4178 -2.3881 -2.6836 -3.9083 -6.4840
## 1146 6.0397 6.3250 4.9877 0.1028 4.4464 4.3764 5.2353 4.4539 0.5204
## ME4.BM2 ME4.BM3 ME4.BM4 ME4.BM5 BIG.LoBM ME5.BM2 ME5.BM3 ME5.BM4 BIG.HiBM
## 1141 1.8909 -1.2862 1.3608 -4.2888 3.3015 3.1933 0.4040 -1.5415 -3.4379
## 1142 1.5669 1.6435 3.2307 5.0117 3.3117 4.1280 0.5554 1.2868 4.3102
## 1143 -5.0464 -2.1698 -4.5869 1.4868 -5.2780 -6.2358 -4.3843 -0.7534 0.6721
## 1144 5.5583 5.4486 4.5498 4.4312 8.9941 5.6401 0.8190 5.5387 7.5280
## 1145 -1.9543 -2.9812 -1.3566 -4.6413 1.2334 -3.0009 -3.0145 -4.5284 -6.6413
## 1146 5.9283 5.3941 6.1848 3.9667 2.1199 5.2625 6.9025 5.8952 1.6612
names(df) <- gsub("\\.", "_", names(df))
names(df)
## [1] "Date" "SMALL_LoBM" "ME1_BM2" "ME1_BM3" "ME1_BM4"
## [6] "SMALL_HiBM" "ME2_BM1" "ME2_BM2" "ME2_BM3" "ME2_BM4"
## [11] "ME2_BM5" "ME3_BM1" "ME3_BM2" "ME3_BM3" "ME3_BM4"
## [16] "ME3_BM5" "ME4_BM1" "ME4_BM2" "ME4_BM3" "ME4_BM4"
## [21] "ME4_BM5" "BIG_LoBM" "ME5_BM2" "ME5_BM3" "ME5_BM4"
## [26] "BIG_HiBM"
df_FF <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\F-F_Research_Data_Factors.csv", header = TRUE, skip = 3)
df_FF <- df_FF[1:1146,]
names(df_FF)[names(df_FF) == "X"] <- "Date"
names(df_FF)[names(df_FF) == "Mkt.RF"] <- "Mkt_RF"
df_FF <- as.data.frame(lapply(df_FF, as.numeric))
df_FF$Date <- parse_date_time(df_FF$Date, "ym")
tail(df_FF)
## Date Mkt_RF SMB HML RF
## 1141 2021-07-01 1.27 -3.96 -1.75 0.00
## 1142 2021-08-01 2.90 -0.48 -0.13 0.00
## 1143 2021-09-01 -4.37 0.80 5.09 0.00
## 1144 2021-10-01 6.65 -2.28 -0.44 0.00
## 1145 2021-11-01 -1.55 -1.35 -0.53 0.00
## 1146 2021-12-01 3.10 -1.58 3.22 0.01
df_combined <- left_join(df, df_FF, by = "Date")
tail(df_combined)
## Date SMALL_LoBM ME1_BM2 ME1_BM3 ME1_BM4 SMALL_HiBM ME2_BM1 ME2_BM2
## 1141 2021-07-01 -10.1722 -7.6929 -3.6097 -4.7531 -1.5492 -7.4616 -3.2841
## 1142 2021-08-01 2.5319 5.5794 4.0683 1.8348 3.9693 1.9219 2.3791
## 1143 2021-09-01 -5.1961 -4.3855 -3.3635 -0.8425 0.1511 -5.3511 -4.1155
## 1144 2021-10-01 2.0093 -0.3933 1.0377 1.8838 4.1799 2.1078 2.5007
## 1145 2021-11-01 -8.7691 -6.8659 -4.1992 -3.8271 -3.4686 -7.9499 -3.8268
## 1146 2021-12-01 -6.6488 0.3345 1.9093 2.3951 3.3515 -0.2830 0.0193
## ME2_BM3 ME2_BM4 ME2_BM5 ME3_BM1 ME3_BM2 ME3_BM3 ME3_BM4 ME3_BM5 ME4_BM1
## 1141 -1.7334 -2.1717 -6.0351 -2.5013 -1.1614 -1.0348 -1.1665 -4.5054 -0.7491
## 1142 1.2297 1.5206 2.4331 1.9087 1.4109 1.8187 1.8091 3.5476 3.2077
## 1143 -3.6209 -0.4379 3.3170 -3.8808 -4.0114 -2.8858 -4.3101 1.9898 -4.3556
## 1144 3.8364 4.6983 3.0187 4.7347 3.5965 3.9967 3.9258 4.2605 4.7589
## 1145 0.7390 -3.0060 -4.1636 -8.5724 -2.4178 -2.3881 -2.6836 -3.9083 -6.4840
## 1146 6.0397 6.3250 4.9877 0.1028 4.4464 4.3764 5.2353 4.4539 0.5204
## ME4_BM2 ME4_BM3 ME4_BM4 ME4_BM5 BIG_LoBM ME5_BM2 ME5_BM3 ME5_BM4 BIG_HiBM
## 1141 1.8909 -1.2862 1.3608 -4.2888 3.3015 3.1933 0.4040 -1.5415 -3.4379
## 1142 1.5669 1.6435 3.2307 5.0117 3.3117 4.1280 0.5554 1.2868 4.3102
## 1143 -5.0464 -2.1698 -4.5869 1.4868 -5.2780 -6.2358 -4.3843 -0.7534 0.6721
## 1144 5.5583 5.4486 4.5498 4.4312 8.9941 5.6401 0.8190 5.5387 7.5280
## 1145 -1.9543 -2.9812 -1.3566 -4.6413 1.2334 -3.0009 -3.0145 -4.5284 -6.6413
## 1146 5.9283 5.3941 6.1848 3.9667 2.1199 5.2625 6.9025 5.8952 1.6612
## Mkt_RF SMB HML RF
## 1141 1.27 -3.96 -1.75 0.00
## 1142 2.90 -0.48 -0.13 0.00
## 1143 -4.37 0.80 5.09 0.00
## 1144 6.65 -2.28 -0.44 0.00
## 1145 -1.55 -1.35 -0.53 0.00
## 1146 3.10 -1.58 3.22 0.01
df_combined <- df_combined %>%
mutate(MKT_RF_DIFF = Mkt_RF - RF)
names_1 <- c()
r_squared_1 <- c()
realized_return_1 <- c()
pred_return_1 <- c()
reg_1 <- c()
for (i in 2:26) {
reg_1[[i]] <- summary(lm(as.matrix(df_combined[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(RF), data = df_combined))
names_1[[i]] <- colnames(df_combined[i])
print(ggplot(data = df_combined, aes(x = Date)) +
geom_line(aes(y = df_combined[ , i] - RF, color="Realized Returns")) +
geom_line(aes(y = as.vector(reg_1[[i]]$coefficients[2,1]) * MKT_RF_DIFF, color="Predicted Returns")) +
labs(y = "Excess Returns", colour = "Legend") +
ggtitle(names_1[[i]]) +
theme(
plot.title = element_text(size = 15, face = "bold", hjust = 0.5, vjust = 2.5),
axis.title.x = element_text(size = 12, face = "bold", margin = margin(t = 10)),
axis.title.y = element_text(size = 12, face = "bold", margin = margin(r = 10)),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10.5),
legend.key.height = unit(1, 'cm'),
legend.key.width = unit(1.5, 'cm'),
legend.position = "right", legend.margin=margin(l = 30)) +
guides(color = guide_legend(override.aes = list(size = 1.5)))
)
pred_return_1[[i]] <- mean(reg_1[[i]]$coefficients[2,1] * df_combined$MKT_RF_DIFF)
realized_return_1[[i]] <- mean(df_combined[ , i] - df_combined$RF)
r_squared_1[[i]] <- reg_1[[i]]$adj.r.squared
}
pred_return_1 <- as.data.frame(as.matrix(pred_return_1))
pred_return_1 <- pred_return_1[-c(1),]
pred_return_1 <- as.data.frame(as.list(pred_return_1), col.names = names(df_combined[2:26]), row.names = "Predicted_Returns")
realized_return_1 <- as.data.frame(as.matrix(realized_return_1))
realized_return_1 <- realized_return_1[-c(1),]
realized_return_1 <- as.data.frame(as.list(realized_return_1), col.names = names(df_combined[2:26]), row.names = "Realized_Returns")
realized_vs_pred_return_1 <- rbind(pred_return_1, realized_return_1)
realized_vs_pred_return_1 <- as.data.frame(t(realized_vs_pred_return_1))
realized_vs_pred_return_1 <- realized_vs_pred_return_1 %>%
add_column(Group = c('A', 'B', 'B', 'B', 'A', 'C', 'C', 'C', 'C', 'C', 'D', 'D', 'D', 'D', 'D', 'E', 'E', 'E', 'E', 'E', 'A', 'F', 'F', 'F', 'A'), .after="Realized_Returns")
realized_vs_pred_return_1[,1:2]
## Predicted_Returns Realized_Returns
## SMALL_LoBM 0.6857270 0.6044773
## ME1_BM2 0.5967542 0.7300789
## ME1_BM3 0.5947202 1.0124432
## ME1_BM4 0.5383386 1.1812958
## SMALL_HiBM 0.5827222 1.3745457
## ME2_BM1 0.5387897 0.6670689
## ME2_BM2 0.5209447 0.9573097
## ME2_BM3 0.5100120 0.9869790
## ME2_BM4 0.5156962 1.0541577
## ME2_BM5 0.5886444 1.2567768
## ME3_BM1 0.5308739 0.7525312
## ME3_BM2 0.4791648 0.9231689
## ME3_BM3 0.4770094 0.9348139
## ME3_BM4 0.4991309 1.0248032
## ME3_BM5 0.5794921 1.1172081
## ME4_BM1 0.4621543 0.7635621
## ME4_BM2 0.4631926 0.8089417
## ME4_BM3 0.4682924 0.8660339
## ME4_BM4 0.4955658 0.9690417
## ME4_BM5 0.6012664 1.0536288
## BIG_LoBM 0.4071054 0.6872679
## ME5_BM2 0.4031932 0.6583507
## ME5_BM3 0.4126699 0.7236165
## ME5_BM4 0.4711381 0.6623302
## BIG_HiBM 0.5579382 0.9480490
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.1.2
ggplot(data = realized_vs_pred_return_1, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_1))) +
geom_point(size = 2) +
geom_label_repel(aes(fill = factor(Group)), color = 'white', fontface = 'bold', min.segment.length = 0, segment.color = 'black', show.legend = FALSE) +
ggtitle("Predicted Vs. Realized Excess Returns") +
theme(
plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11)
) +
geom_abline() +
scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.75)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 1.4)) +
labs(y = "Realized Returns", x = "Predicted Returns")
r_squared_1 <- as.data.frame(as.matrix(r_squared_1))
r_squared_1 <- r_squared_1[-c(1),]
r_squared_1 <- as.data.frame(as.list(r_squared_1), col.names = names(df_combined[2:26]), row.names = "R_Squared_CAPM")
names_2 <- c()
r_squared_2 <- c()
realized_return_2 <- c()
pred_return_2 <- c()
reg_2 <- c()
for (i in 2:26) {
reg_2[[i]] <- summary(lm(as.matrix(df_combined[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined))
names_2[[i]] <- colnames(df_combined[i])
print(ggplot(data = df_combined, aes(x = Date)) +
geom_line(aes(y = df_combined[ , i] - RF, color="Realized Returns")) +
geom_line(aes(y = as.vector(reg_2[[i]]$coefficients[2,1]) * MKT_RF_DIFF, color="Predicted Returns")) +
labs(y = "Excess Returns", colour = "Legend") +
ggtitle(names_2[[i]]) +
theme(
plot.title = element_text(size = 15, face = "bold", hjust = 0.5, vjust = 2.5),
axis.title.x = element_text(size = 12, face = "bold", margin = margin(t = 10)),
axis.title.y = element_text(size = 12, face = "bold", margin = margin(r = 10)),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10.5),
legend.key.height = unit(1, 'cm'),
legend.key.width = unit(1.5, 'cm'),
legend.position = "right", legend.margin=margin(l = 30)) +
guides(color = guide_legend(override.aes = list(size = 1.5)))
)
pred_return_2[[i]] <- mean(reg_2[[i]]$coefficients[2,1] * df_combined$MKT_RF_DIFF)
realized_return_2[[i]] <- mean(df_combined[ , i] - df_combined$RF)
r_squared_2[[i]] <- reg_2[[i]]$adj.r.squared
}
pred_return_2 <- as.data.frame(as.matrix(pred_return_2))
pred_return_2 <- pred_return_2[-c(1),]
pred_return_2 <- as.data.frame(as.list(pred_return_2), col.names = names(df_combined[2:26]), row.names = "Predicted_Returns")
realized_return_2 <- as.data.frame(as.matrix(realized_return_2))
realized_return_2 <- realized_return_2[-c(1),]
realized_return_2 <- as.data.frame(as.list(realized_return_2), col.names = names(df_combined[2:26]), row.names = "Realized_Returns")
realized_vs_pred_return_2 <- rbind(pred_return_2, realized_return_2)
realized_vs_pred_return_2 <- as.data.frame(t(realized_vs_pred_return_2))
realized_vs_pred_return_2 <- realized_vs_pred_return_2 %>%
add_column(Group = c('A', 'B', 'B', 'B', 'A', 'C', 'C', 'C', 'C', 'C', 'D', 'D', 'D', 'D', 'D', 'E', 'E', 'E', 'E', 'E', 'A', 'F', 'F', 'F', 'A'), .after="Realized_Returns")
realized_vs_pred_return_2[,1:2]
## Predicted_Returns Realized_Returns
## SMALL_LoBM 0.5442463 0.6044773
## ME1_BM2 0.4598130 0.7300789
## ME1_BM3 0.4589947 1.0124432
## ME1_BM4 0.4014335 1.1812958
## SMALL_HiBM 0.4167590 1.3745457
## ME2_BM1 0.4637794 0.6670689
## ME2_BM2 0.4335062 0.9573097
## ME2_BM3 0.4207845 0.9869790
## ME2_BM4 0.4114393 1.0541577
## ME2_BM5 0.4556272 1.2567768
## ME3_BM1 0.4821273 0.7525312
## ME3_BM2 0.4339799 0.9231689
## ME3_BM3 0.4196771 0.9348139
## ME3_BM4 0.4231335 1.0248032
## ME3_BM5 0.4745503 1.1172081
## ME4_BM1 0.4598769 0.7635621
## ME4_BM2 0.4377957 0.8089417
## ME4_BM3 0.4285701 0.8660339
## ME4_BM4 0.4398941 0.9690417
## ME4_BM5 0.5130312 1.0536288
## BIG_LoBM 0.4372281 0.6872679
## ME5_BM2 0.4171068 0.6583507
## ME5_BM3 0.4091017 0.7236165
## ME5_BM4 0.4406138 0.6623302
## BIG_HiBM 0.5015446 0.9480490
ggplot(data = realized_vs_pred_return_2, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_2))) +
geom_point(size = 2) +
geom_label_repel(aes(fill = factor(Group)), color = 'white', fontface = 'bold', min.segment.length = 0, segment.color = 'black', show.legend = FALSE) +
ggtitle("Predicted Vs. Realized Excess Returns") +
theme(
plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11)
) +
geom_abline() +
scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.555)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 1.4)) +
labs(y = "Realized Returns", x = "Predicted Returns")
r_squared_2 <- as.data.frame(as.matrix(r_squared_2))
r_squared_2 <- r_squared_2[-c(1),]
r_squared_2 <- as.data.frame(as.list(r_squared_2), col.names = names(df_combined[2:26]), row.names = "R_Squared_Fama_French")
CAPM_vs_FF_R2 <- rbind(r_squared_1, r_squared_2)
CAPM_vs_FF_R2 <- as.data.frame(t(CAPM_vs_FF_R2))
CAPM_vs_FF_R2 <- CAPM_vs_FF_R2 %>%
mutate(Diff = R_Squared_Fama_French - R_Squared_CAPM)
CAPM_vs_FF_R2 <- cbind(Portfolio = rownames(CAPM_vs_FF_R2), CAPM_vs_FF_R2)
rownames(CAPM_vs_FF_R2) <- 1:nrow(CAPM_vs_FF_R2)
CAPM_vs_FF_R2
## Portfolio R_Squared_CAPM R_Squared_Fama_French Diff
## 1 SMALL_LoBM 0.5095481 0.6579181 0.14837000
## 2 ME1_BM2 0.5852910 0.8217644 0.23647337
## 3 ME1_BM3 0.6595627 0.8751185 0.21555582
## 4 ME1_BM4 0.6616252 0.9286335 0.26700830
## 5 SMALL_HiBM 0.6206036 0.9288200 0.30821645
## 6 ME2_BM1 0.7208177 0.9100205 0.18920279
## 7 ME2_BM2 0.7655697 0.9315921 0.16602234
## 8 ME2_BM3 0.7828256 0.9337152 0.15088955
## 9 ME2_BM4 0.7616347 0.9526927 0.19105806
## 10 ME2_BM5 0.7189660 0.9511521 0.23218610
## 11 ME3_BM1 0.8127497 0.9285372 0.11578748
## 12 ME3_BM2 0.8660324 0.9303425 0.06431012
## 13 ME3_BM3 0.8511719 0.9266235 0.07545166
## 14 ME3_BM4 0.8077029 0.9321766 0.12447369
## 15 ME3_BM5 0.7488635 0.9232252 0.17436167
## 16 ME4_BM1 0.8722148 0.9325506 0.06033586
## 17 ME4_BM2 0.9056843 0.9225659 0.01688166
## 18 ME4_BM3 0.8686237 0.9142044 0.04558063
## 19 ME4_BM4 0.8223125 0.9160716 0.09375907
## 20 ME4_BM5 0.7578851 0.9130054 0.15512034
## 21 BIG_LoBM 0.9171262 0.9548618 0.03773557
## 22 ME5_BM2 0.9193302 0.9310154 0.01168527
## 23 ME5_BM3 0.8505411 0.9048232 0.05428211
## 24 ME5_BM4 0.8016296 0.9229742 0.12134460
## 25 BIG_HiBM 0.6682738 0.8347205 0.16644664
print(paste0("The average R Squared over the 25 Portfolios is ", round(mean(CAPM_vs_FF_R2$R_Squared_CAPM), digits = 6), " under the CAPM Model"))
## [1] "The average R Squared over the 25 Portfolios is 0.770263 under the CAPM Model"
print('')
## [1] ""
print(paste0("The average R Squared over the 25 Portfolios is ", round(mean(CAPM_vs_FF_R2$R_Squared_Fama_French), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 25 Portfolios is 0.907165 under the Fama French 3 Factor Model"
We can see that for all 25 portfolios formed on book-to-market, the Fama French 3 Factor Model is more accurate than the CAPM model. For example, the Fama French Model accounts for over 30% more variation than the CAPM model does for the portfolio labelled “SMALL_Hi_BM”. The CAPM model is a good rudimentary and versatile model, but it only uses one factor to explain portfolio returns. Additionally, this factor is the difference between the index return and the risk-free rate. Thus, if a given portfolio has similar weights and holdings to the market index, the CAPM model will be able to price it well. However, if a given portfolio has wildly different weights and holdings when compared to the market index, the CAPM model will likely struggle to accurately price it. The plots of mean realized excess returns against the predicted excess returns further supports this conclusion. We can see that the plot corresponding to the Fama French Model does a better job of pricing the portfolios than the CAPM model.
library(reshape)
## Warning: package 'reshape' was built under R version 4.1.2
##
## Attaching package: 'reshape'
## The following object is masked from 'package:lubridate':
##
## stamp
## The following object is masked from 'package:dplyr':
##
## rename
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
CAPM_vs_FF_R2_plot_prep <- select(CAPM_vs_FF_R2, -c(Diff))
CAPM_vs_FF_R2_melt <- melt(CAPM_vs_FF_R2_plot_prep, "Portfolio")
ggplot(data = CAPM_vs_FF_R2_melt, aes(x = Portfolio, y = value)) +
geom_bar(aes(fill=variable), stat = "identity", position = position_dodge(width=0.4), width = 0.4) +
ggtitle("CAPM Vs Fama French") +
labs(y = "R_Squared") +
theme(
plot.title = element_text(size = 15, face = "bold", hjust = 0.5, vjust = 2.5),
axis.title.x = element_text(size = 12, face = "bold", margin = margin(t = 10)),
axis.title.y = element_text(size = 12, face = "bold", margin = margin(r = 10)),
axis.text.x = element_text(size = 11, angle=-40, hjust=0.1),
axis.text.y = element_text(size = 11),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10.5),
legend.key.height = unit(1, 'cm'),
legend.key.width = unit(1, 'cm'),
legend.position = "right", legend.margin=margin(l = 20))+
guides(fill=guide_legend(title="Legend"))
df_OP <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\Portfolios_Formed_on_OP.csv", header = TRUE, skip = 24)
df_OP <- df_OP[1:702,]
names(df_OP)[names(df_OP) == "X"] <- "Date"
df_OP <- as.data.frame(lapply(df_OP, as.numeric))
df_OP$Date <- parse_date_time(df_OP$Date, "ym")
df_FF_OP <- df_FF[445:1146,]
df_combined_OP <- left_join(df_OP, df_FF_OP, by = "Date")
df_combined_OP <- df_combined_OP %>%
mutate(MKT_RF_DIFF = Mkt_RF - RF)
df_combined_OP <- df_combined_OP[-c(2:9)]
names(df_combined_OP) <- gsub("\\.", "_", names(df_combined_OP))
head(df_combined_OP)
## Date Lo_10 Dec_2 Dec_3 Dec_4 Dec_5 Dec_6 Dec_7 Dec_8 Dec_9 Hi_10 Mkt_RF
## 1 1963-07-01 -1.44 -0.19 -1.02 2.19 -0.84 0.92 -0.72 -1.15 -0.31 0.98 -0.39
## 2 1963-08-01 5.53 4.45 6.00 3.55 5.62 5.11 5.57 5.36 5.90 6.07 5.07
## 3 1963-09-01 -3.02 -1.65 -1.15 -2.59 2.21 -1.88 -2.48 -1.34 -2.48 -0.97 -1.57
## 4 1963-10-01 0.77 0.85 0.97 -0.05 3.41 0.84 0.98 3.67 2.92 8.88 2.53
## 5 1963-11-01 0.18 -1.74 -1.81 -0.63 2.05 -0.63 -0.74 1.06 0.30 -3.73 -0.85
## 6 1963-12-01 1.86 0.03 3.47 4.30 0.14 3.34 2.68 2.53 2.60 1.35 1.83
## SMB HML RF MKT_RF_DIFF
## 1 -0.53 -0.89 0.27 -0.66
## 2 -0.89 1.68 0.25 4.82
## 3 -0.35 0.08 0.27 -1.84
## 4 -0.56 -0.14 0.29 2.24
## 5 -1.17 1.81 0.27 -1.12
## 6 -2.11 -0.08 0.29 1.54
r_squared_3 <- c()
realized_return_3 <- c()
pred_return_3 <- c()
reg_3 <- c()
for (i in 2:11) {
reg_3[[i]] <- summary(lm(as.matrix(df_combined_OP[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined_OP))
pred_return_3[[i]] <- mean(reg_3[[i]]$coefficients[2,1] * df_combined_OP$MKT_RF_DIFF)
realized_return_3[[i]] <- mean(df_combined_OP[ , i] - df_combined_OP$RF)
r_squared_3[[i]] <- reg_3[[i]]$adj.r.squared
}
pred_return_3 <- as.data.frame(as.matrix(pred_return_3))
pred_return_3 <- pred_return_3[-c(1),]
pred_return_3 <- as.data.frame(as.list(pred_return_3), col.names = names(df_combined_OP[2:11]), row.names = "Predicted_Returns")
realized_return_3 <- as.data.frame(as.matrix(realized_return_3))
realized_return_3 <- realized_return_3[-c(1),]
realized_return_3 <- as.data.frame(as.list(realized_return_3), col.names = names(df_combined_OP[2:11]), row.names = "Realized_Returns")
realized_vs_pred_return_3 <- rbind(pred_return_3, realized_return_3)
realized_vs_pred_return_3 <- as.data.frame(t(realized_vs_pred_return_3))
realized_vs_pred_return_3 <- realized_vs_pred_return_3 %>%
add_column(Group = c('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'), .after="Realized_Returns")
realized_vs_pred_return_3[,1:2]
## Predicted_Returns Realized_Returns
## Lo_10 0.2619437 0.4304843
## Dec_2 0.2373854 0.4591168
## Dec_3 0.2341771 0.4872222
## Dec_4 0.2196764 0.5520513
## Dec_5 0.2240728 0.6761823
## Dec_6 0.2235734 0.5732906
## Dec_7 0.2194115 0.5531339
## Dec_8 0.2228889 0.6916382
## Dec_9 0.2165977 0.7410399
## Hi_10 0.2174477 0.6822507
ggplot(data = realized_vs_pred_return_3, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_3))) +
geom_point(size = 2) +
geom_label_repel(aes(fill = factor(Group)), color = 'white', fontface = 'bold', min.segment.length = 0, segment.color = 'black', show.legend = FALSE) +
ggtitle("Predicted Vs. Realized Excess Returns") +
theme(
plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11)
) +
geom_abline() +
scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.30)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 0.8)) +
labs(y = "Realized Returns", x = "Predicted Returns")
r_squared_3 <- as.data.frame(as.matrix(r_squared_3))
r_squared_3 <- r_squared_3[-c(1),]
r_squared_3 <- as.data.frame(as.list(r_squared_3), col.names = names(df_combined_OP[2:11]), row.names = "R_Squared")
r_squared_3 <- as.data.frame(t(r_squared_3))
r_squared_3 <- cbind(Operating_Profitability_Portfolios = rownames(r_squared_3), r_squared_3)
rownames(r_squared_3) <- 1:nrow(r_squared_3)
r_squared_3
## Operating_Profitability_Portfolios R_Squared
## 1 Lo_10 0.8597687
## 2 Dec_2 0.8754843
## 3 Dec_3 0.8973700
## 4 Dec_4 0.8939074
## 5 Dec_5 0.8912305
## 6 Dec_6 0.9121761
## 7 Dec_7 0.9209898
## 8 Dec_8 0.9201417
## 9 Dec_9 0.9137266
## 10 Hi_10 0.8771008
print(paste0("The average R Squared over the 10 Portfolios is ", round(mean(r_squared_3$R_Squared), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 10 Portfolios is 0.89619 under the Fama French 3 Factor Model"
df_INV <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\Portfolios_Formed_on_INV.csv", header = TRUE, skip = 17)
df_INV <- df_INV[1:702,]
names(df_INV)[names(df_INV) == "X"] <- "Date"
df_INV <- as.data.frame(lapply(df_INV, as.numeric))
df_INV$Date <- parse_date_time(df_INV$Date, "ym")
df_combined_INV <- left_join(df_INV, df_FF_OP, by = "Date")
df_combined_INV <- df_combined_INV %>%
mutate(MKT_RF_DIFF = Mkt_RF - RF)
df_combined_INV <- df_combined_INV[-c(2:9)]
names(df_combined_INV) <- gsub("\\.", "_", names(df_combined_INV))
head(df_combined_INV)
## Date Lo_10 Dec_2 Dec_3 Dec_4 Dec_5 Dec_6 Dec_7 Dec_8 Dec_9 Hi_10 Mkt_RF
## 1 1963-07-01 -2.34 -0.69 -0.12 -0.10 1.04 0.25 0.25 0.30 0.49 0.73 -0.39
## 2 1963-08-01 5.40 6.90 5.34 5.36 5.64 5.30 4.54 4.51 6.05 7.72 5.07
## 3 1963-09-01 -0.83 -0.37 -3.21 -0.32 -2.76 -2.18 -0.26 -0.80 -1.15 -1.85 -1.57
## 4 1963-10-01 2.91 2.37 1.41 0.66 0.95 1.61 2.41 4.41 8.41 4.38 2.53
## 5 1963-11-01 1.85 -0.62 -0.40 -1.53 -1.80 -1.11 1.77 -0.40 -4.39 -0.64 -0.85
## 6 1963-12-01 2.19 2.26 1.83 3.82 1.84 2.38 1.43 3.86 1.13 1.06 1.83
## SMB HML RF MKT_RF_DIFF
## 1 -0.53 -0.89 0.27 -0.66
## 2 -0.89 1.68 0.25 4.82
## 3 -0.35 0.08 0.27 -1.84
## 4 -0.56 -0.14 0.29 2.24
## 5 -1.17 1.81 0.27 -1.12
## 6 -2.11 -0.08 0.29 1.54
r_squared_4 <- c()
realized_return_4 <- c()
pred_return_4 <- c()
reg_4 <- c()
for (i in 2:11) {
reg_4[[i]] <- summary(lm(as.matrix(df_combined_INV[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined_INV))
pred_return_4[[i]] <- mean(reg_4[[i]]$coefficients[2,1] * df_combined_INV$MKT_RF_DIFF)
realized_return_4[[i]] <- mean(df_combined_INV[ , i] - df_combined_INV$RF)
r_squared_4[[i]] <- reg_4[[i]]$adj.r.squared
}
pred_return_4 <- as.data.frame(as.matrix(pred_return_4))
pred_return_4 <- pred_return_4[-c(1),]
pred_return_4 <- as.data.frame(as.list(pred_return_4), col.names = names(df_combined_INV[2:11]), row.names = "Predicted_Returns")
realized_return_4 <- as.data.frame(as.matrix(realized_return_4))
realized_return_4 <- realized_return_4[-c(1),]
realized_return_4 <- as.data.frame(as.list(realized_return_4), col.names = names(df_combined_INV[2:11]), row.names = "Realized_Returns")
realized_vs_pred_return_4 <- rbind(pred_return_4, realized_return_4)
realized_vs_pred_return_4 <- as.data.frame(t(realized_vs_pred_return_4))
realized_vs_pred_return_4 <- realized_vs_pred_return_4 %>%
add_column(Group = c('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'), .after="Realized_Returns")
realized_vs_pred_return_4[,1:2]
## Predicted_Returns Realized_Returns
## Lo_10 0.2322115 0.7944017
## Dec_2 0.2212424 0.8185897
## Dec_3 0.2101821 0.7330057
## Dec_4 0.2057983 0.6099003
## Dec_5 0.2061364 0.6184330
## Dec_6 0.2154027 0.5964387
## Dec_7 0.2162181 0.6116952
## Dec_8 0.2261915 0.6499145
## Dec_9 0.2413185 0.6731909
## Hi_10 0.2649270 0.4253134
ggplot(data = realized_vs_pred_return_4, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_4))) +
geom_point(size = 2) +
geom_label_repel(aes(fill = factor(Group)), color = 'white', fontface = 'bold', min.segment.length = 0, segment.color = 'black', show.legend = FALSE) +
ggtitle("Predicted Vs. Realized Excess Returns") +
theme(
plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11)
) +
geom_abline() +
scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.30)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 0.9)) +
labs(y = "Realized Returns", x = "Predicted Returns")
r_squared_4 <- as.data.frame(as.matrix(r_squared_4))
r_squared_4 <- r_squared_4[-c(1),]
r_squared_4 <- as.data.frame(as.list(r_squared_4), col.names = names(df_combined_INV[2:11]), row.names = "R_Squared")
r_squared_4 <- as.data.frame(t(r_squared_4))
r_squared_4 <- cbind(Investment_Portfolios = rownames(r_squared_4), r_squared_4)
rownames(r_squared_4) <- 1:nrow(r_squared_4)
r_squared_4
## Investment_Portfolios R_Squared
## 1 Lo_10 0.8606610
## 2 Dec_2 0.8578449
## 3 Dec_3 0.8650878
## 4 Dec_4 0.8870272
## 5 Dec_5 0.9013214
## 6 Dec_6 0.9109013
## 7 Dec_7 0.9018029
## 8 Dec_8 0.9171260
## 9 Dec_9 0.9028660
## 10 Hi_10 0.9253423
print(paste0("The average R Squared over the 10 Portfolios is ", round(mean(r_squared_4$R_Squared), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 10 Portfolios is 0.892998 under the Fama French 3 Factor Model"
df_DY <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\Portfolios_Formed_on_D-P.csv", header = TRUE, skip = 19)
df_DY <- df_DY[1:1134,]
names(df_DY)[names(df_DY) == "X"] <- "Date"
df_DY <- as.data.frame(lapply(df_DY, as.numeric))
df_DY$Date <- parse_date_time(df_DY$Date, "ym")
df_FF_DY <- df_FF[13:1146,]
df_combined_DY <- left_join(df_DY, df_FF_DY, by = "Date")
df_combined_DY <- df_combined_DY %>%
mutate(MKT_RF_DIFF = Mkt_RF - RF)
df_combined_DY <- df_combined_DY[-c(2:10)]
names(df_combined_DY) <- gsub("\\.", "_", names(df_combined_DY))
head(df_combined_DY)
## Date Lo_10 Dec_2 Dec_3 Dec_4 Dec_5 Dec_6 Dec_7 Dec_8 Dec_9 Hi_10 Mkt_RF
## 1 1927-07-01 11.50 6.00 7.68 8.79 5.56 9.26 12.38 6.88 5.65 4.02 7.26
## 2 1927-08-01 5.44 3.40 0.88 2.59 -0.12 2.21 5.83 2.44 3.30 -2.31 1.97
## 3 1927-09-01 1.84 6.61 4.01 5.20 4.47 5.72 5.67 3.88 3.13 3.50 4.76
## 4 1927-10-01 -6.51 -3.75 -5.24 -5.62 -2.12 -2.88 -3.47 -3.27 -0.98 -4.13 -4.31
## 5 1927-11-01 6.98 6.59 6.92 9.68 4.18 7.01 5.48 7.67 8.14 11.18 6.58
## 6 1927-12-01 2.05 1.40 1.67 2.85 0.76 0.06 6.30 5.21 8.68 0.55 2.09
## SMB HML RF MKT_RF_DIFF
## 1 -3.25 -1.14 0.30 6.96
## 2 -0.69 -3.74 0.28 1.69
## 3 -3.63 -0.63 0.21 4.55
## 4 2.12 -4.33 0.25 -4.56
## 5 2.72 -0.27 0.21 6.37
## 6 0.97 -1.13 0.22 1.87
r_squared_5 <- c()
realized_return_5 <- c()
pred_return_5 <- c()
reg_5 <- c()
for (i in 2:11) {
reg_5[[i]] <- summary(lm(as.matrix(df_combined_DY[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined_DY))
pred_return_5[[i]] <- mean(reg_5[[i]]$coefficients[2,1] * df_combined_DY$MKT_RF_DIFF)
realized_return_5[[i]] <- mean(df_combined_DY[ , i] - df_combined_DY$RF)
r_squared_5[[i]] <- reg_5[[i]]$adj.r.squared
}
pred_return_5 <- as.data.frame(as.matrix(pred_return_5))
pred_return_5 <- pred_return_5[-c(1),]
pred_return_5 <- as.data.frame(as.list(pred_return_5), col.names = names(df_combined_DY[2:11]), row.names = "Predicted_Returns")
realized_return_5 <- as.data.frame(as.matrix(realized_return_5))
realized_return_5 <- realized_return_5[-c(1),]
realized_return_5 <- as.data.frame(as.list(realized_return_5), col.names = names(df_combined_DY[2:11]), row.names = "Realized_Returns")
realized_vs_pred_return_5 <- rbind(pred_return_5, realized_return_5)
realized_vs_pred_return_5 <- as.data.frame(t(realized_vs_pred_return_5))
realized_vs_pred_return_5 <- realized_vs_pred_return_5 %>%
add_column(Group = c('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'), .after="Realized_Returns")
realized_vs_pred_return_5[,1:2]
## Predicted_Returns Realized_Returns
## Lo_10 0.4789129 0.6484568
## Dec_2 0.4363415 0.7116578
## Dec_3 0.4216964 0.6883157
## Dec_4 0.4020086 0.7709788
## Dec_5 0.4089952 0.6183598
## Dec_6 0.3920431 0.7398060
## Dec_7 0.3837628 0.7935802
## Dec_8 0.3747815 0.8046825
## Dec_9 0.3793918 0.7846825
## Hi_10 0.3621550 0.7834656
ggplot(data = realized_vs_pred_return_5, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_5))) +
geom_point(size = 2) +
geom_label_repel(aes(fill = factor(Group)), color = 'white', fontface = 'bold', min.segment.length = 0, segment.color = 'black', show.legend = FALSE) +
ggtitle("Predicted Vs. Realized Excess Returns") +
theme(
plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11)
) +
geom_abline() +
scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.55)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 0.9)) +
labs(y = "Realized Returns", x = "Predicted Returns")
r_squared_5 <- as.data.frame(as.matrix(r_squared_5))
r_squared_5 <- r_squared_5[-c(1),]
r_squared_5 <- as.data.frame(as.list(r_squared_5), col.names = names(df_combined_DY[2:11]), row.names = "R_Squared")
r_squared_5 <- as.data.frame(t(r_squared_5))
r_squared_5 <- cbind(Dividend_Yield_Portfolios = rownames(r_squared_5), r_squared_5)
rownames(r_squared_5) <- 1:nrow(r_squared_5)
r_squared_5
## Dividend_Yield_Portfolios R_Squared
## 1 Lo_10 0.8817372
## 2 Dec_2 0.9036159
## 3 Dec_3 0.8886744
## 4 Dec_4 0.8954145
## 5 Dec_5 0.8731314
## 6 Dec_6 0.8661112
## 7 Dec_7 0.8620661
## 8 Dec_8 0.8516656
## 9 Dec_9 0.8290817
## 10 Hi_10 0.7978232
print(paste0("The average R Squared over the 10 Portfolios is ", round(mean(r_squared_5$R_Squared), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 10 Portfolios is 0.864932 under the Fama French 3 Factor Model"
df_MM <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\10_Portfolios_Prior_12_2.csv", header = TRUE, skip = 10)
df_MM <- df_MM[1:1140,]
names(df_MM)[names(df_MM) == "X"] <- "Date"
df_MM <- as.data.frame(lapply(df_MM, as.numeric))
df_MM$Date <- parse_date_time(df_MM$Date, "ym")
df_FF_MM <- df_FF[7:1146,]
df_combined_MM <- left_join(df_MM, df_FF_MM, by = "Date")
df_combined_MM <- df_combined_MM %>%
mutate(MKT_RF_DIFF = Mkt_RF - RF)
names(df_combined_MM) <- gsub("\\.", "_", names(df_combined_MM))
head(df_combined_MM)
## Date Lo_PRIOR PRIOR_2 PRIOR_3 PRIOR_4 PRIOR_5 PRIOR_6 PRIOR_7 PRIOR_8
## 1 1927-01-01 -3.32 -4.46 2.69 -0.33 -0.41 0.97 0.74 0.36
## 2 1927-02-01 7.53 5.93 8.25 7.27 3.22 4.21 2.83 3.26
## 3 1927-03-01 -3.23 -3.01 -3.92 -4.77 -0.48 -2.42 2.05 0.41
## 4 1927-04-01 2.04 -3.20 -2.47 -1.37 2.20 -0.05 2.05 -0.59
## 5 1927-05-01 2.72 4.57 5.95 3.18 6.36 5.80 4.96 6.81
## 6 1927-06-01 -6.44 -3.51 -3.13 -2.62 -2.10 -1.91 -1.92 -1.64
## PRIOR_9 Hi_PRIOR Mkt_RF SMB HML RF MKT_RF_DIFF
## 1 -0.41 -0.24 -0.06 -0.51 4.73 0.25 -0.31
## 2 4.20 7.01 4.18 -0.25 3.27 0.26 3.92
## 3 0.96 5.47 0.13 -1.89 -2.56 0.30 -0.17
## 4 1.59 5.49 0.46 0.49 0.71 0.25 0.21
## 5 8.11 6.32 5.44 1.46 4.98 0.30 5.14
## 6 -2.37 -1.58 -2.34 0.51 -1.53 0.26 -2.60
r_squared_6 <- c()
realized_return_6 <- c()
pred_return_6 <- c()
reg_6 <- c()
for (i in 2:11) {
reg_6[[i]] <- summary(lm(as.matrix(df_combined_MM[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined_MM))
pred_return_6[[i]] <- mean(reg_6[[i]]$coefficients[2,1] * df_combined_MM$MKT_RF_DIFF)
realized_return_6[[i]] <- mean(df_combined_MM[ , i] - df_combined_MM$RF)
r_squared_6[[i]] <- reg_6[[i]]$adj.r.squared
}
pred_return_6 <- as.data.frame(as.matrix(pred_return_6))
pred_return_6 <- pred_return_6[-c(1),]
pred_return_6 <- as.data.frame(as.list(pred_return_6), col.names = names(df_combined_MM[2:11]), row.names = "Predicted_Returns")
realized_return_6 <- as.data.frame(as.matrix(realized_return_6))
realized_return_6 <- realized_return_6[-c(1),]
realized_return_6 <- as.data.frame(as.list(realized_return_6), col.names = names(df_combined_MM[2:11]), row.names = "Realized_Returns")
realized_vs_pred_return_6 <- rbind(pred_return_6, realized_return_6)
realized_vs_pred_return_6 <- as.data.frame(t(realized_vs_pred_return_6))
realized_vs_pred_return_6 <- realized_vs_pred_return_6 %>%
add_column(Group = c('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'), .after="Realized_Returns")
realized_vs_pred_return_6[,1:2]
## Predicted_Returns Realized_Returns
## Lo_PRIOR 0.5913294 0.1015000
## PRIOR_2 0.5291004 0.4910965
## PRIOR_3 0.4742153 0.5092544
## PRIOR_4 0.4497942 0.6647719
## PRIOR_5 0.4271412 0.6474298
## PRIOR_6 0.4287309 0.7110789
## PRIOR_7 0.4129910 0.7688860
## PRIOR_8 0.3990540 0.8615088
## PRIOR_9 0.4125182 0.9400789
## Hi_PRIOR 0.4293843 1.2398596
ggplot(data = realized_vs_pred_return_6, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_6))) +
geom_point(size = 2) +
geom_label_repel(aes(fill = factor(Group)), color = 'white', fontface = 'bold', min.segment.length = 0, segment.color = 'black', show.legend = FALSE) +
ggtitle("Predicted Vs. Realized Excess Returns") +
theme(
plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11)
) +
geom_abline() +
scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.6)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 1.3)) +
labs(y = "Realized Returns", x = "Predicted Returns")
r_squared_6 <- as.data.frame(as.matrix(r_squared_6))
r_squared_6 <- r_squared_6[-c(1),]
r_squared_6 <- as.data.frame(as.list(r_squared_6), col.names = names(df_combined_MM[2:11]), row.names = "R_Squared")
r_squared_6 <- as.data.frame(t(r_squared_6))
r_squared_6 <- cbind(Momentum_Portfolios = rownames(r_squared_6), r_squared_6)
rownames(r_squared_6) <- 1:nrow(r_squared_6)
r_squared_6
## Momentum_Portfolios R_Squared
## 1 Lo_PRIOR 0.7735194
## 2 PRIOR_2 0.8177860
## 3 PRIOR_3 0.8444302
## 4 PRIOR_4 0.8743757
## 5 PRIOR_5 0.8961707
## 6 PRIOR_6 0.9207952
## 7 PRIOR_7 0.9010999
## 8 PRIOR_8 0.8821015
## 9 PRIOR_9 0.8522983
## 10 Hi_PRIOR 0.7627175
print(paste0("The average R Squared over the 10 Portfolios is ", round(mean(r_squared_6$R_Squared), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 10 Portfolios is 0.852529 under the Fama French 3 Factor Model"
df_17 <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\17_Industry_Portfolios.csv", header = TRUE, skip = 11)
df_17 <- df_17[1:1146,]
names(df_17)[names(df_17) == "X"] <- "Date"
df_17 <- as.data.frame(lapply(df_17, as.numeric))
df_17$Date <- parse_date_time(df_17$Date, "ym")
df_FF_17 <- df_FF[1:1146,]
df_combined_17 <- left_join(df_17, df_FF_17, by = "Date")
df_combined_17 <- df_combined_17 %>%
mutate(MKT_RF_DIFF = Mkt_RF - RF)
head(df_combined_17)
## Date Food Mines Oil Clths Durbl Chems Cnsum Cnstr Steel FabPr Machn
## 1 1926-07-01 0.48 3.78 -1.41 6.02 -1.62 8.46 1.42 2.31 4.07 8.77 3.79
## 2 1926-08-01 2.91 0.69 3.60 0.15 -1.96 5.70 5.84 4.33 2.17 -5.56 2.35
## 3 1926-09-01 1.20 1.10 -3.68 0.26 0.24 5.48 1.21 -0.06 0.15 -4.13 -0.65
## 4 1926-10-01 -3.06 -0.79 -1.02 0.37 -6.07 -4.76 0.69 -4.79 -3.85 -5.13 -3.29
## 5 1926-11-01 6.37 4.38 -0.01 2.22 -1.95 5.27 4.63 2.45 3.86 3.57 4.54
## 6 1926-12-01 -0.57 1.66 2.92 2.17 3.79 5.20 2.27 4.30 3.75 -3.85 1.29
## Cars Trans Utils Rtail Finan Other Mkt_RF SMB HML RF MKT_RF_DIFF
## 1 17.43 1.83 7.04 0.13 -0.02 1.22 2.96 -2.38 -2.73 0.22 2.74
## 2 3.96 4.54 -1.69 -0.68 4.47 3.11 2.64 -1.47 4.14 0.25 2.39
## 3 5.57 0.32 2.04 0.21 -1.61 1.82 0.36 -1.39 0.12 0.23 0.13
## 4 -8.29 -2.90 -2.63 -2.26 -5.51 -0.88 -3.24 -0.13 0.65 0.32 -3.56
## 5 -0.28 2.19 3.71 6.44 2.34 1.38 2.53 -0.16 -0.38 0.31 2.22
## 6 10.80 3.56 -0.17 0.61 2.60 1.82 2.62 -0.02 0.00 0.28 2.34
r_squared_7 <- c()
realized_return_7 <- c()
pred_return_7 <- c()
reg_7 <- c()
for (i in 2:18) {
reg_7[[i]] <- summary(lm(as.matrix(df_combined_17[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined_17))
pred_return_7[[i]] <- mean(reg_7[[i]]$coefficients[2,1] * df_combined_17$MKT_RF_DIFF)
realized_return_7[[i]] <- mean(df_combined_17[ , i] - df_combined_17$RF)
r_squared_7[[i]] <- reg_7[[i]]$adj.r.squared
}
pred_return_7 <- as.data.frame(as.matrix(pred_return_7))
pred_return_7 <- pred_return_7[-c(1),]
pred_return_7 <- as.data.frame(as.list(pred_return_7), col.names = names(df_combined_17[2:18]), row.names = "Predicted_Returns")
realized_return_7 <- as.data.frame(as.matrix(realized_return_7))
realized_return_7 <- realized_return_7[-c(1),]
realized_return_7 <- as.data.frame(as.list(realized_return_7), col.names = names(df_combined_17[2:18]), row.names = "Realized_Returns")
realized_vs_pred_return_7 <- rbind(pred_return_7, realized_return_7)
realized_vs_pred_return_7 <- as.data.frame(t(realized_vs_pred_return_7))
realized_vs_pred_return_7
## Predicted_Returns Realized_Returns
## Food 0.3276799 0.7159773
## Mines 0.3789590 0.7025567
## Oil 0.3774229 0.7406545
## Clths 0.3531473 0.7065532
## Durbl 0.4877504 0.6821990
## Chems 0.4548016 0.7823822
## Cnsum 0.3329988 0.7650262
## Cnstr 0.4760653 0.7816754
## Steel 0.5339467 0.6712042
## FabPr 0.4029743 0.7143019
## Machn 0.5184376 0.8710558
## Cars 0.5067694 0.9632286
## Trans 0.4446450 0.7072949
## Utils 0.3222166 0.6166579
## Rtail 0.4113723 0.7808115
## Finan 0.4788713 0.7584817
## Other 0.3863271 0.6735166
ggplot(data = realized_vs_pred_return_7, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_7))) +
geom_point(size = 2) +
geom_label_repel(fontface = 'bold', min.segment.length = 0, segment.color = 'black') +
ggtitle("Predicted Vs. Realized Excess Returns") +
theme(
plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11)
) +
geom_abline() +
scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.55)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 1.1)) +
labs(y = "Realized Returns", x = "Predicted Returns")
r_squared_7 <- as.data.frame(as.matrix(r_squared_7))
r_squared_7 <- r_squared_7[-c(1),]
r_squared_7 <- as.data.frame(as.list(r_squared_7), col.names = names(df_combined_17[2:18]), row.names = "R_Squared")
r_squared_7 <- as.data.frame(t(r_squared_7))
r_squared_7 <- cbind(Industry_Portfolios = rownames(r_squared_7), r_squared_7)
rownames(r_squared_7) <- 1:nrow(r_squared_7)
r_squared_7
## Industry_Portfolios R_Squared
## 1 Food 0.7220694
## 2 Mines 0.5389444
## 3 Oil 0.5837083
## 4 Clths 0.6739104
## 5 Durbl 0.8053966
## 6 Chems 0.7706768
## 7 Cnsum 0.6654104
## 8 Cnstr 0.8403978
## 9 Steel 0.7567506
## 10 FabPr 0.7423396
## 11 Machn 0.8598135
## 12 Cars 0.6958938
## 13 Trans 0.8272660
## 14 Utils 0.5893746
## 15 Rtail 0.7541838
## 16 Finan 0.8643262
## 17 Other 0.8993971
print(paste0("The average R Squared over the 17 Industry Portfolios is ", round(mean(r_squared_7$R_Squared), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 17 Industry Portfolios is 0.74058 under the Fama French 3 Factor Model"
df_30 <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\30_Industry_Portfolios.csv", header = TRUE, skip = 11)
df_30 <- df_30[1:1146,]
names(df_30)[names(df_30) == "X"] <- "Date"
df_30 <- as.data.frame(lapply(df_30, as.numeric))
df_30$Date <- parse_date_time(df_30$Date, "ym")
df_FF_30 <- df_FF[1:1146,]
df_combined_30 <- left_join(df_30, df_FF_30, by = "Date")
df_combined_30 <- df_combined_30 %>%
mutate(MKT_RF_DIFF = Mkt_RF - RF)
head(df_combined_30)
## Date Food Beer Smoke Games Books Hshld Clths Hlth Chems Txtls Cnstr
## 1 1926-07-01 0.56 -5.19 1.29 2.93 10.97 -0.48 8.08 1.77 8.14 0.39 2.07
## 2 1926-08-01 2.59 27.03 6.50 0.55 10.01 -3.58 -2.51 4.25 5.50 7.97 4.72
## 3 1926-09-01 1.16 4.02 1.26 6.58 -0.99 0.73 -0.51 0.69 5.33 2.30 -0.50
## 4 1926-10-01 -3.06 -3.31 1.06 -4.76 9.47 -4.68 0.12 -0.57 -4.76 1.00 -4.55
## 5 1926-11-01 6.35 7.29 4.55 1.66 -5.80 -0.54 1.87 5.42 5.20 3.10 2.20
## 6 1926-12-01 -0.51 -4.09 2.55 2.17 0.53 2.56 0.60 0.11 5.37 6.39 3.52
## Steel FabPr ElcEq Autos Carry Mines Coal Oil Util Telcm Servs BusEq Paper
## 1 4.07 5.43 3.18 16.39 1.02 5.64 1.54 -1.40 7.04 0.83 9.22 2.06 7.70
## 2 2.17 2.06 2.10 4.23 1.66 0.55 0.85 3.69 -1.69 2.17 2.02 4.39 -2.38
## 3 0.15 0.36 -0.56 4.83 2.73 1.74 0.30 -3.69 2.04 2.41 2.25 0.19 -5.54
## 4 -3.85 1.11 -5.73 -7.93 -5.56 -3.20 2.23 -1.04 -2.63 -0.11 -2.00 -1.09 -5.08
## 5 3.86 3.18 5.08 -0.66 7.87 8.46 -0.48 0.06 3.71 1.63 3.77 3.64 3.84
## 6 3.75 5.02 -2.47 10.49 2.48 1.83 1.43 2.94 -0.17 1.99 6.21 7.24 -4.63
## Trans Whlsl Rtail Meals Fin Other Mkt_RF SMB HML RF MKT_RF_DIFF
## 1 1.91 -23.79 0.07 1.87 -0.02 5.20 2.96 -2.38 -2.73 0.22 2.74
## 2 4.85 5.39 -0.75 -0.13 4.47 6.76 2.64 -1.47 4.14 0.25 2.39
## 3 0.07 -7.87 0.25 -0.56 -1.61 -3.86 0.36 -1.39 0.12 0.23 0.13
## 4 -2.61 -15.38 -2.20 -4.11 -5.51 -8.49 -3.24 -0.13 0.65 0.32 -3.56
## 5 1.61 4.67 6.52 4.33 2.34 4.00 2.53 -0.16 -0.38 0.31 2.22
## 6 3.68 9.65 0.57 1.51 2.60 -2.34 2.62 -0.02 0.00 0.28 2.34
r_squared_8 <- c()
realized_return_8 <- c()
pred_return_8 <- c()
reg_8 <- c()
for (i in 2:31) {
reg_8[[i]] <- summary(lm(as.matrix(df_combined_30[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined_30))
pred_return_8[[i]] <- mean(reg_8[[i]]$coefficients[2,1] * df_combined_30$MKT_RF_DIFF)
realized_return_8[[i]] <- mean(df_combined_30[ , i] - df_combined_30$RF)
r_squared_8[[i]] <- reg_8[[i]]$adj.r.squared
}
pred_return_8 <- as.data.frame(as.matrix(pred_return_8))
pred_return_8 <- pred_return_8[-c(1),]
pred_return_8 <- as.data.frame(as.list(pred_return_8), col.names = names(df_combined_30[2:31]), row.names = "Predicted_Returns")
realized_return_8 <- as.data.frame(as.matrix(realized_return_8))
realized_return_8 <- realized_return_8[-c(1),]
realized_return_8 <- as.data.frame(as.list(realized_return_8), col.names = names(df_combined_30[2:31]), row.names = "Realized_Returns")
realized_vs_pred_return_8 <- rbind(pred_return_8, realized_return_8)
realized_vs_pred_return_8 <- as.data.frame(t(realized_vs_pred_return_8))
realized_vs_pred_return_8
## Predicted_Returns Realized_Returns
## Food 0.3222931 0.7000611
## Beer 0.3719956 0.9355323
## Smoke 0.2813273 0.8705585
## Games 0.5448740 0.8883944
## Books 0.4313819 0.6579930
## Hshld 0.3887106 0.6785079
## Clths 0.3220425 0.7021030
## Hlth 0.3762053 0.8270593
## Chems 0.4548752 0.7954014
## Txtls 0.4186150 0.7131326
## Cnstr 0.4752720 0.7184293
## Steel 0.5339699 0.6712042
## FabPr 0.4976994 0.8250698
## ElcEq 0.5505322 0.9230279
## Autos 0.5232395 0.9452182
## Carry 0.4694880 0.8582286
## Mines 0.3561400 0.6663700
## Coal 0.4681152 0.7653403
## Oil 0.3764917 0.7382112
## Util 0.3222166 0.6166579
## Telcm 0.2976187 0.5851134
## Servs 0.3541885 0.9882810
## BusEq 0.4765955 0.9277138
## Paper 0.4063406 0.7433857
## Trans 0.4403819 0.6750349
## Whlsl 0.4166664 0.5800000
## Rtail 0.4157324 0.8014834
## Meals 0.3815996 0.8206370
## Fin 0.4788226 0.7587435
## Other 0.4261006 0.5303229
ggplot(data = realized_vs_pred_return_8, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_8))) +
geom_point(size = 2) +
geom_label_repel(fontface = 'bold', min.segment.length = 0, segment.color = 'black') +
ggtitle("Predicted Vs. Realized Excess Returns") +
theme(
plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11)
) +
geom_abline() +
scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.6)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 1.1)) +
labs(y = "Realized Returns", x = "Predicted Returns")
r_squared_8 <- as.data.frame(as.matrix(r_squared_8))
r_squared_8 <- r_squared_8[-c(1),]
r_squared_8 <- as.data.frame(as.list(r_squared_8), col.names = names(df_combined_30[2:31]), row.names = "R_Squared")
r_squared_8 <- as.data.frame(t(r_squared_8))
r_squared_8 <- cbind(Industry_Portfolios = rownames(r_squared_8), r_squared_8)
rownames(r_squared_8) <- 1:nrow(r_squared_8)
r_squared_8
## Industry_Portfolios R_Squared
## 1 Food 0.6996798
## 2 Beer 0.4942521
## 3 Smoke 0.3492252
## 4 Games 0.7080587
## 5 Books 0.7138365
## 6 Hshld 0.6776486
## 7 Clths 0.5569650
## 8 Hlth 0.6645048
## 9 Chems 0.7808480
## 10 Txtls 0.6945682
## 11 Cnstr 0.8403758
## 12 Steel 0.7568121
## 13 FabPr 0.8529388
## 14 ElcEq 0.8142165
## 15 Autos 0.6866316
## 16 Carry 0.7200590
## 17 Mines 0.4574915
## 18 Coal 0.4573826
## 19 Oil 0.5860283
## 20 Util 0.5893746
## 21 Telcm 0.6038135
## 22 Servs 0.3399916
## 23 BusEq 0.7803378
## 24 Paper 0.7482788
## 25 Trans 0.7942989
## 26 Whlsl 0.6885572
## 27 Rtail 0.7491477
## 28 Meals 0.6199605
## 29 Fin 0.8643518
## 30 Other 0.7145639
print(paste0("The average R Squared over the 30 Industry Portfolios is ", round(mean(r_squared_8$R_Squared), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 30 Industry Portfolios is 0.666807 under the Fama French 3 Factor Model"
df_49 <- read.csv("C:\\Users\\zerva\\Documents\\AM14\\49_Industry_Portfolios.csv", header = TRUE, skip = 11)
df_49 <- df_49[1:1146,]
names(df_49)[names(df_49) == "X"] <- "Date"
df_49 <- as.data.frame(lapply(df_49, as.numeric))
df_49$Date <- parse_date_time(df_49$Date, "ym")
df_FF_49 <- df_FF[1:1146,]
df_combined_49 <- left_join(df_49, df_FF_49, by = "Date")
df_combined_49 <- df_combined_49 %>%
mutate(MKT_RF_DIFF = Mkt_RF - RF)
head(df_combined_49)
## Date Agric Food Soda Beer Smoke Toys Fun Books Hshld Clths
## 1 1926-07-01 2.37 0.12 -99.99 -5.19 1.29 8.65 2.50 50.21 -0.48 8.08
## 2 1926-08-01 2.23 2.68 -99.99 27.03 6.50 16.81 -0.76 42.98 -3.58 -2.51
## 3 1926-09-01 -0.57 1.58 -99.99 4.02 1.26 8.33 6.42 -4.91 0.73 -0.51
## 4 1926-10-01 -0.46 -3.68 -99.99 -3.31 1.06 -1.40 -5.09 5.37 -4.68 0.12
## 5 1926-11-01 6.75 6.26 -99.99 7.29 4.55 0.00 1.82 -6.40 -0.54 1.87
## 6 1926-12-01 -3.27 0.18 -99.99 -4.09 2.55 2.48 2.14 -3.29 2.56 0.60
## Hlth MedEq Drugs Chems Rubbr Txtls BldMt Cnstr Steel FabPr Mach ElcEq
## 1 -99.99 4.95 0.91 8.14 -99.99 0.39 2.46 -8.21 4.07 -99.99 5.43 3.18
## 2 -99.99 4.20 4.26 5.50 -99.99 7.97 4.63 7.37 2.17 -99.99 2.06 2.10
## 3 -99.99 4.02 -0.26 5.33 -99.99 2.30 -0.11 -11.76 0.15 -99.99 0.36 -0.56
## 4 -99.99 3.47 -1.76 -4.76 -99.99 1.00 -4.42 -8.59 -3.85 -99.99 1.11 -5.73
## 5 -99.99 4.33 5.76 5.20 -99.99 3.10 2.20 2.25 3.86 -99.99 3.18 5.08
## 6 -99.99 -3.28 1.15 5.37 -99.99 6.39 3.60 0.76 3.75 -99.99 5.02 -2.47
## Autos Aero Ships Guns Gold Mines Coal Oil Util Telcm PerSv BusSv
## 1 16.39 -0.65 1.05 -99.99 -99.99 5.64 1.54 -1.40 7.04 0.83 -99.99 5.85
## 2 4.23 -6.58 1.80 -99.99 -99.99 0.55 0.85 3.69 -1.69 2.17 -99.99 1.51
## 3 4.83 -7.09 2.88 -99.99 -99.99 1.74 0.30 -3.69 2.04 2.41 -99.99 0.81
## 4 -7.93 -13.36 -5.45 -99.99 -99.99 -3.20 2.23 -1.04 -2.63 -0.11 -99.99 1.81
## 5 -0.66 6.61 7.89 -99.99 -99.99 8.46 -0.48 0.06 3.71 1.63 -99.99 0.78
## 6 10.49 9.17 2.39 -99.99 -99.99 1.83 1.43 2.94 -0.17 1.99 -99.99 5.16
## Hardw Softw Chips LabEq Paper Boxes Trans Whlsl Rtail Meals Banks Insur
## 1 3.76 -99.99 1.31 1.35 -99.99 7.70 1.92 -23.79 0.07 1.87 4.61 -0.54
## 2 3.57 -99.99 -4.70 6.89 -99.99 -2.38 4.85 5.39 -0.75 -0.13 11.83 2.57
## 3 4.23 -99.99 -6.04 -0.63 -99.99 -5.54 0.08 -7.87 0.25 -0.56 -1.75 0.72
## 4 -2.47 -99.99 -3.92 0.21 -99.99 -5.08 -2.62 -15.38 -2.20 -4.11 -11.82 -4.28
## 5 3.28 -99.99 1.31 4.25 -99.99 3.84 1.61 4.67 6.52 4.33 -2.97 3.58
## 6 4.38 -99.99 -0.51 10.14 -99.99 -4.63 3.67 9.65 0.57 1.51 -1.06 3.27
## RlEst Fin Other Mkt_RF SMB HML RF MKT_RF_DIFF
## 1 2.89 -5.77 5.20 2.96 -2.38 -2.73 0.22 2.74
## 2 5.30 0.32 6.76 2.64 -1.47 4.14 0.25 2.39
## 3 -3.06 -4.81 -3.86 0.36 -1.39 0.12 0.23 0.13
## 4 -5.74 -0.94 -8.49 -3.24 -0.13 0.65 0.32 -3.56
## 5 2.21 5.13 4.00 2.53 -0.16 -0.38 0.31 2.22
## 6 7.95 -1.59 -2.34 2.62 -0.02 0.00 0.28 2.34
r_squared_9 <- c()
realized_return_9 <- c()
pred_return_9 <- c()
reg_9 <- c()
for (i in 2:50) {
reg_9[[i]] <- summary(lm(as.matrix(df_combined_49[i]) ~ as.matrix(MKT_RF_DIFF) + as.matrix(SMB) + as.matrix(HML) + as.matrix(RF), data = df_combined_49))
pred_return_9[[i]] <- mean(reg_9[[i]]$coefficients[2,1] * df_combined_49$MKT_RF_DIFF)
realized_return_9[[i]] <- mean(df_combined_49[ , i] - df_combined_49$RF)
r_squared_9[[i]] <- reg_9[[i]]$adj.r.squared
}
pred_return_9 <- as.data.frame(as.matrix(pred_return_9))
pred_return_9 <- pred_return_9[-c(1),]
pred_return_9 <- as.data.frame(as.list(pred_return_9), col.names = names(df_combined_49[2:50]), row.names = "Predicted_Returns")
realized_return_9 <- as.data.frame(as.matrix(realized_return_9))
realized_return_9 <- realized_return_9[-c(1),]
realized_return_9 <- as.data.frame(as.list(realized_return_9), col.names = names(df_combined_49[2:50]), row.names = "Realized_Returns")
realized_vs_pred_return_9 <- rbind(pred_return_9, realized_return_9)
realized_vs_pred_return_9 <- as.data.frame(t(realized_vs_pred_return_9))
realized_vs_pred_return_9
## Predicted_Returns Realized_Returns
## Agric 0.3690020 0.6897033
## Food 0.3147743 0.7044241
## Soda 0.2142908 -38.2995113
## Beer 0.3719956 0.9355323
## Smoke 0.2813273 0.8705585
## Toys 0.4407853 0.7620593
## Fun 0.5613260 1.0102792
## Books 0.4340629 0.7388133
## Hshld 0.3887106 0.6785079
## Clths 0.3220425 0.7021030
## Hlth 0.2863528 -44.7092234
## MedEq 0.3565161 0.8911693
## Drugs 0.3775667 0.8441187
## Chems 0.4548752 0.7954014
## Rubbr 0.3662613 -4.3570157
## Txtls 0.4186150 0.7131326
## BldMt 0.4729507 0.7568412
## Cnstr 0.5036145 0.8016492
## Steel 0.5339699 0.6712042
## FabPr 0.2392854 -38.5043717
## Mach 0.4988441 0.8294328
## ElcEq 0.5505322 0.9230279
## Autos 0.5232395 0.9452182
## Aero 0.5155849 1.1250611
## Ships 0.4516008 0.7391449
## Guns 0.2001709 -38.2834468
## Gold 0.1467824 -38.3938394
## Mines 0.3880681 0.7765794
## Coal 0.4681152 0.7653403
## Oil 0.3764917 0.7382112
## Util 0.3222166 0.6166579
## Telcm 0.2976187 0.5851134
## PerSv 0.3798745 -0.3957853
## BusSv 0.3622930 0.7523473
## Hardw 0.4887152 0.9697382
## Softw 0.3338181 -40.4757068
## Chips 0.5489626 0.9710995
## LabEq 0.4339411 0.9163525
## Paper 0.4328280 -2.0972513
## Boxes 0.4097480 0.8054887
## Trans 0.4406261 0.6747033
## Whlsl 0.4166664 0.5800000
## Rtail 0.4157324 0.8014834
## Meals 0.3815996 0.8206370
## Banks 0.4395205 0.8982199
## Insur 0.4726298 0.8107417
## RlEst 0.4639355 0.5896859
## Fin 0.5266363 0.8410733
## Other 0.4278090 0.4629058
ggplot(data = realized_vs_pred_return_9, aes(x = Predicted_Returns, y = Realized_Returns, label = rownames(realized_vs_pred_return_9))) +
geom_point(size = 2) +
geom_label_repel(fontface = 'bold', min.segment.length = 0, segment.color = 'black') +
ggtitle("Predicted Vs. Realized Excess Returns") +
theme(
plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11)
) +
geom_abline() +
scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(0.0, 0.6)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 15), limits = c(0.00, 1.1)) +
labs(y = "Realized Returns", x = "Predicted Returns")
r_squared_9 <- as.data.frame(as.matrix(r_squared_9))
r_squared_9 <- r_squared_9[-c(1),]
r_squared_9 <- as.data.frame(as.list(r_squared_9), col.names = names(df_combined_49[2:50]), row.names = "R_Squared")
r_squared_9 <- as.data.frame(t(r_squared_9))
r_squared_9 <- cbind(Industry_Portfolios = rownames(r_squared_9), r_squared_9)
rownames(r_squared_9) <- 1:nrow(r_squared_9)
r_squared_9
## Industry_Portfolios R_Squared
## 1 Agric 0.42715718
## 2 Food 0.66291402
## 3 Soda 0.23948000
## 4 Beer 0.49425207
## 5 Smoke 0.34922524
## 6 Toys 0.49113612
## 7 Fun 0.68209913
## 8 Books 0.60440792
## 9 Hshld 0.67764860
## 10 Clths 0.55696498
## 11 Hlth 0.19332690
## 12 MedEq 0.53771652
## 13 Drugs 0.62411791
## 14 Chems 0.78084801
## 15 Rubbr 0.07380009
## 16 Txtls 0.69456822
## 17 BldMt 0.82365613
## 18 Cnstr 0.64448573
## 19 Steel 0.75681215
## 20 FabPr 0.23899151
## 21 Mach 0.85092894
## 22 ElcEq 0.81421648
## 23 Autos 0.68663158
## 24 Aero 0.58762489
## 25 Ships 0.63643053
## 26 Guns 0.23947891
## 27 Gold 0.23250182
## 28 Mines 0.52800608
## 29 Coal 0.45738260
## 30 Oil 0.58602833
## 31 Util 0.58937463
## 32 Telcm 0.60381347
## 33 PerSv 0.20305815
## 34 BusSv 0.50744614
## 35 Hardw 0.68531102
## 36 Softw 0.23877173
## 37 Chips 0.72156230
## 38 LabEq 0.68390508
## 39 Paper 0.17398330
## 40 Boxes 0.68816557
## 41 Trans 0.79349957
## 42 Whlsl 0.68855719
## 43 Rtail 0.74914772
## 44 Meals 0.61996047
## 45 Banks 0.64003796
## 46 Insur 0.65899283
## 47 RlEst 0.58425468
## 48 Fin 0.83430303
## 49 Other 0.61008148
print(paste0("The average R Squared over the 49 Industry Portfolios is ", round(mean(r_squared_9$R_Squared), digits = 6), " under the Fama French 3 Factor Model"))
## [1] "The average R Squared over the 49 Industry Portfolios is 0.560144 under the Fama French 3 Factor Model"
r_df <- c(mean(r_squared_3$R_Squared), mean(r_squared_4$R_Squared), mean(r_squared_5$R_Squared), mean(r_squared_6$R_Squared), mean(r_squared_7$R_Squared), mean(r_squared_8$R_Squared), mean(r_squared_9$R_Squared))
ports <- c("Operating Profitability", "Investment", "Dividend Yield", "Momentum", "17 Industry", "30 Industry", "49 Industry")
r_df <- as.data.frame(r_df)
ports <- as.data.frame(ports)
r_df2 <- cbind(ports, r_df)
names(r_df2)[names(r_df2) == "ports"] <- "Portfolios"
names(r_df2)[names(r_df2) == "r_df"] <- "Average R Squared"
r_df2
## Portfolios Average R Squared
## 1 Operating Profitability 0.8961896
## 2 Investment 0.8929981
## 3 Dividend Yield 0.8649321
## 4 Momentum 0.8525294
## 5 17 Industry 0.7405800
## 6 30 Industry 0.6668066
## 7 49 Industry 0.5601442
From the table above, we can see that the Fama French 3 Factor Model is able to price the operating profitability, investment, dividend yield, and momentum portfolios quite well. It even prices the 17 Industry Portfolios rather well, though notably not as well as the first four sets. The model starts to struggle when it’s passed portfolios from the 30 Industry and 49 Industry sets. This is likely partly due to the fact that there are significant outliers present in these two data sets. Additionally, these sets of portfolios incorporate an extremely wide range of industries. Not every industry behaves the same and different industries are subject to different sets of factors that affect their prices.
I would add a monthly momentum factor which returns the value of the closing price on the last day of the month minus the closing price on the first day of the month. The resulting values will be either negative, positive, or zero. A positive value would indicate the portfolio has upwards momentum while a negative value would indicate the portfolio has a downwards momentum. A value of zero would indicate that the portfolio has no positive or negative momentum.
The Fama-French three factor model is a good model, but it is certainly not as good as it gets. Other models that incorporate momentum, dividend payout rate, and other factors have been proven to be more accurate at pricing assets. It should also be noted that the Black-Scholes formula is considered the king of asset pricing formulas.
While the Fama-French three factor model does tend to outperform the CAPM, we should not forget about CAPM entirely. It is a very versatile pricing model which gives us insight as to how the portfolio is related to the market index without taking into account other factors.
Empirically, it has been shown that neither the three factor model nor the five factor model is the best model. In general, implementation of the Black-Scholes formula via Bayesian statistics is the best way forward.
The validity of the CAPM model can be tested empirically, but not with absolute rigor. The reason for this is because the CAPM formula is heavily dependent on the market portfolio. Over time, the market portfolio has changed drastically and so it is difficult to empirically test the CAPM model.
library(readxl)
returns_daily <- read_excel("C:\\Users\\zerva\\Documents\\AM14\\PS1_Daily.xlsx", sheet = "HPR_daily", skip = 1)
prices_daily <- read_excel("C:\\Users\\zerva\\Documents\\AM14\\PS1_Daily.xlsx", sheet = "Prices_daily", skip = 1)
returns_daily$DATE <- parse_date_time(returns_daily$DATE, "ymd")
prices_daily$DATE <- parse_date_time(prices_daily$DATE, "ymd")
head(returns_daily)
## # A tibble: 6 x 9
## DATE MSFT XOM GE JPM INTC C
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1990-01-02 00:00:00 0.0201 0 0.0349 0.00418 0.0435 0.0307
## 2 1990-01-03 00:00:00 0.00563 -0.01 -0.00187 0.0333 -0.0278 0.0128
## 3 1990-01-04 00:00:00 0.0294 -0.0101 -0.00563 0.00403 0.0214 -0.0126
## 4 1990-01-05 00:00:00 -0.0245 -0.00510 -0.00943 0.00402 -0.00699 0.00851
## 5 1990-01-08 00:00:00 0.0153 0.0154 0.00571 0 0.0141 0.00844
## 6 1990-01-09 00:00:00 -0.00275 -0.0202 -0.0208 -0.032 0.0278 -0.0126
## # ... with 2 more variables: VWRETD <dbl>, SPRTRN <dbl>
log_returns <- returns_daily %>%
mutate(log_MSFT = log(1 + MSFT)) %>%
mutate(log_XOM = log(1 + XOM)) %>%
mutate(log_GE = log(1 + GE)) %>%
mutate(log_JPM = log(1 + JPM)) %>%
mutate(log_INTC = log(1 + INTC)) %>%
mutate(log_C = log(1 + C)) %>%
mutate(log_VWRETD = log(1 + VWRETD)) %>%
mutate(log_SPRTRN = log(1 + SPRTRN))
head(log_returns)
## # A tibble: 6 x 17
## DATE MSFT XOM GE JPM INTC C
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1990-01-02 00:00:00 0.0201 0 0.0349 0.00418 0.0435 0.0307
## 2 1990-01-03 00:00:00 0.00563 -0.01 -0.00187 0.0333 -0.0278 0.0128
## 3 1990-01-04 00:00:00 0.0294 -0.0101 -0.00563 0.00403 0.0214 -0.0126
## 4 1990-01-05 00:00:00 -0.0245 -0.00510 -0.00943 0.00402 -0.00699 0.00851
## 5 1990-01-08 00:00:00 0.0153 0.0154 0.00571 0 0.0141 0.00844
## 6 1990-01-09 00:00:00 -0.00275 -0.0202 -0.0208 -0.032 0.0278 -0.0126
## # ... with 10 more variables: VWRETD <dbl>, SPRTRN <dbl>, log_MSFT <dbl>,
## # log_XOM <dbl>, log_GE <dbl>, log_JPM <dbl>, log_INTC <dbl>, log_C <dbl>,
## # log_VWRETD <dbl>, log_SPRTRN <dbl>
log_returns <- subset(log_returns, select = -c(MSFT, XOM, GE, JPM, INTC, C, VWRETD, SPRTRN))
head(log_returns)
## # A tibble: 6 x 9
## DATE log_MSFT log_XOM log_GE log_JPM log_INTC log_C
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1990-01-02 00:00:00 0.0199 0 0.0343 0.00418 0.0426 0.0302
## 2 1990-01-03 00:00:00 0.00562 -0.0101 -0.00187 0.0328 -0.0282 0.0127
## 3 1990-01-04 00:00:00 0.0290 -0.0102 -0.00564 0.00402 0.0212 -0.0127
## 4 1990-01-05 00:00:00 -0.0248 -0.00512 -0.00948 0.00401 -0.00702 0.00847
## 5 1990-01-08 00:00:00 0.0152 0.0153 0.00570 0 0.0140 0.00840
## 6 1990-01-09 00:00:00 -0.00275 -0.0204 -0.0211 -0.0325 0.0274 -0.0126
## # ... with 2 more variables: log_VWRETD <dbl>, log_SPRTRN <dbl>
acf(log_returns$log_SPRTRN**2)
acf(abs(log_returns$log_SPRTRN))
library(roll)
## Warning: package 'roll' was built under R version 4.1.2
m_avg_10 <- as.data.frame(roll_sd(log_returns$log_MSFT, width = 50))
m_avg_10$Date <- log_returns$DATE
names(m_avg_10)[names(m_avg_10) == "roll_sd(log_returns$log_MSFT, width = 50)"] <- "MSFT_10"
m_avg_20 <- as.data.frame(roll_sd(log_returns$log_MSFT, width = 100))
m_avg_20$Date <- log_returns$DATE
names(m_avg_20)[names(m_avg_20) == "roll_sd(log_returns$log_MSFT, width = 100)"] <- "MSFT_20"
m_avg_combined <- left_join(m_avg_10, m_avg_20, by = "Date")
ggplot(data = m_avg_combined) +
geom_line(aes(x = Date, y = MSFT_10, color = "10 week window"), size = 1) +
geom_line(aes(x = Date, y = MSFT_20, color = "20 week window"), size = 1) +
ggtitle("MSFT 10 Vs 20 Week Window") +
theme(
plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10.5),
legend.key.height = unit(1, 'cm'),
legend.key.width = unit(1.5, 'cm'),
legend.position = "right", legend.margin=margin(l = 30)
) +
labs(y = "MA of Volatility of Log Returns", x = "Date", colour = "Legend") +
guides(color = guide_legend(override.aes = list(size = 1.5))) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10), limits = c(-0.01, 0.06))
In light of Modern Portfolio Theory, the results from the above chart indicate that at times, even very secure assets can exhibit noteworthy volatility and so one must choose appropriate windows with which to analyze said volatility.
library(roll)
m_avg_10_2 <- as.data.frame(roll_sd(log_returns$log_SPRTRN, width = 50))
m_avg_10_2$Date <- log_returns$DATE
names(m_avg_10_2)[names(m_avg_10_2) == "roll_sd(log_returns$log_SPRTRN, width = 50)"] <- "SPRTRN_10"
m_avg_20_2 <- as.data.frame(roll_sd(log_returns$log_SPRTRN, width = 100))
m_avg_20_2$Date <- log_returns$DATE
names(m_avg_20_2)[names(m_avg_20_2) == "roll_sd(log_returns$log_SPRTRN, width = 100)"] <- "SPRTRN_20"
m_avg_combined_2 <- left_join(m_avg_10_2, m_avg_20_2, by = "Date")
ggplot(data = m_avg_combined_2) +
geom_line(aes(x = Date, y = SPRTRN_10, color = "10 week window"), size = 1) +
geom_line(aes(x = Date, y = SPRTRN_20, color = "20 week window"), size = 1) +
ggtitle("SPRTRN 10 Vs 20 Week Window") +
theme(
plot.title = element_text(size = 22, face = "bold", hjust = 0.5, vjust = 2.5),
axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 15)),
axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 15)),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10.5),
legend.key.height = unit(1, 'cm'),
legend.key.width = unit(1.5, 'cm'),
legend.position = "right", legend.margin=margin(l = 30)
) +
labs(y = "MA of Volatility of Log Returns", x = "Date", colour = "Legend") +
guides(color = guide_legend(override.aes = list(size = 1.5))) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10), limits = c(-0.01, 0.06))
We can see that the market index exhibits much less volatility than Microsoft, which is to be expected as the market portfolio will not have any firm-specific risk involved.
library(qcc)
## Warning: package 'qcc' was built under R version 4.1.2
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
ewma(log_returns$log_SPRTRN, center = mean(log_returns$log_SPRTRN),lambda = 0.5, std.dev = 0.01)
## List of 15
## $ call : language ewma(data = log_returns$log_SPRTRN, center = mean(log_returns$log_SPRTRN), std.dev = 0.01, lambda = 0.5)
## $ type : chr "ewma"
## $ data.name : chr "log_returns$log_SPRTRN"
## $ data : num [1:6301, 1] 0.01764 -0.00259 -0.00865 -0.0098 0.0045 ...
## ..- attr(*, "dimnames")=List of 2
## $ statistics: Named num [1:6301] 0.01764 -0.00259 -0.00865 -0.0098 0.0045 ...
## ..- attr(*, "names")= chr [1:6301] "1" "2" "3" "4" ...
## $ sizes : int [1:6301] 1 1 1 1 1 1 1 1 1 1 ...
## $ center : num 0.00028
## $ std.dev : num 0.01
## $ x : int [1:6301] 1 2 3 4 5 6 7 8 9 10 ...
## $ y : Named num [1:6301] 0.008961 0.003186 -0.002732 -0.006268 -0.000882 ...
## ..- attr(*, "names")= chr [1:6301] "1" "2" "3" "4" ...
## $ sigma : num [1:6301] 0.005 0.00559 0.00573 0.00576 0.00577 ...
## $ lambda : num 0.5
## $ nsigmas : num 3
## $ limits : num [1:6301, 1:2] -0.0147 -0.0165 -0.0169 -0.017 -0.017 ...
## ..- attr(*, "dimnames")=List of 2
## $ violations: Named int [1:112] 151 164 265 476 1979 2030 2172 2189 2190 2191 ...
## ..- attr(*, "names")= chr [1:112] "151" "164" "265" "476" ...
## - attr(*, "class")= chr "ewma.qcc"
ewma(log_returns$log_SPRTRN, center = mean(log_returns$log_SPRTRN),lambda = 0.75, std.dev = 0.01)
## List of 15
## $ call : language ewma(data = log_returns$log_SPRTRN, center = mean(log_returns$log_SPRTRN), std.dev = 0.01, lambda = 0.75)
## $ type : chr "ewma"
## $ data.name : chr "log_returns$log_SPRTRN"
## $ data : num [1:6301, 1] 0.01764 -0.00259 -0.00865 -0.0098 0.0045 ...
## ..- attr(*, "dimnames")=List of 2
## $ statistics: Named num [1:6301] 0.01764 -0.00259 -0.00865 -0.0098 0.0045 ...
## ..- attr(*, "names")= chr [1:6301] "1" "2" "3" "4" ...
## $ sizes : int [1:6301] 1 1 1 1 1 1 1 1 1 1 ...
## $ center : num 0.00028
## $ std.dev : num 0.01
## $ x : int [1:6301] 1 2 3 4 5 6 7 8 9 10 ...
## $ y : Named num [1:6301] 0.0133 0.00138 -0.00614 -0.00889 0.00116 ...
## ..- attr(*, "names")= chr [1:6301] "1" "2" "3" "4" ...
## $ sigma : num [1:6301] 0.0075 0.00773 0.00775 0.00775 0.00775 ...
## $ lambda : num 0.75
## $ nsigmas : num 3
## $ limits : num [1:6301, 1:2] -0.0222 -0.0229 -0.023 -0.023 -0.023 ...
## ..- attr(*, "dimnames")=List of 2
## $ violations: Named int [1:131] 151 164 166 190 265 476 1565 1979 2030 2172 ...
## ..- attr(*, "names")= chr [1:131] "151" "164" "166" "190" ...
## - attr(*, "class")= chr "ewma.qcc"
ewma(log_returns$log_SPRTRN, center = mean(log_returns$log_SPRTRN),lambda = 0.94, std.dev = 0.01)
## List of 15
## $ call : language ewma(data = log_returns$log_SPRTRN, center = mean(log_returns$log_SPRTRN), std.dev = 0.01, lambda = 0.94)
## $ type : chr "ewma"
## $ data.name : chr "log_returns$log_SPRTRN"
## $ data : num [1:6301, 1] 0.01764 -0.00259 -0.00865 -0.0098 0.0045 ...
## ..- attr(*, "dimnames")=List of 2
## $ statistics: Named num [1:6301] 0.01764 -0.00259 -0.00865 -0.0098 0.0045 ...
## ..- attr(*, "names")= chr [1:6301] "1" "2" "3" "4" ...
## $ sizes : int [1:6301] 1 1 1 1 1 1 1 1 1 1 ...
## $ center : num 0.00028
## $ std.dev : num 0.01
## $ x : int [1:6301] 1 2 3 4 5 6 7 8 9 10 ...
## $ y : Named num [1:6301] 0.0166 -0.00144 -0.00822 -0.00971 0.00365 ...
## ..- attr(*, "names")= chr [1:6301] "1" "2" "3" "4" ...
## $ sigma : num [1:6301] 0.0094 0.00942 0.00942 0.00942 0.00942 ...
## $ lambda : num 0.94
## $ nsigmas : num 3
## $ limits : num [1:6301, 1:2] -0.0279 -0.028 -0.028 -0.028 -0.028 ...
## ..- attr(*, "dimnames")=List of 2
## $ violations: Named int [1:148] 151 164 166 265 476 1565 1940 1979 1980 2030 ...
## ..- attr(*, "names")= chr [1:148] "151" "164" "166" "265" ...
## - attr(*, "class")= chr "ewma.qcc"
Considering the plot above, a sensible choice for the variance would be 0.0001. As the variance is increased, the confidence intervals become wider. The same thing happens when lambda is increased. As lambda is increased, the more recent observations receive heavier and heavier weights as compared to earlier observations and so the confidence intervals become wider.